library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(scales)
POP=""
TD=data.frame()
s=data.frame()
make_graph=function(TD,s){
ggplot(data = TD, aes(x=BIN_START, y=TajimaD), ylim=c(min(TD$TajimaD -0.5), max(TD$TajimaD + 0.5))) + geom_point(shape = 16, colour= alpha("black", 1/5)) + facet_wrap(~CHROM, scales = "free_x") + geom_hline(aes(yintercept= q1, colour ="quantile"), data=s,) + geom_hline(aes(yintercept= q2, colour ="quantile"), data=s) +geom_hline(aes(yintercept= m, colour="mean"), data=s) + scale_colour_manual("",breaks=c("mean","quantile"),values=c("blue","red")) + scale_x_continuous( xlab("Chromosome Position (Mbp)")) + ylab("Tajima's D") + ggtitle(paste0(POP," Tajima's D by Chromosome")) + theme( plot.background= element_rect(colour="black",fill=NA), legend.position= c(0.75, 0.12))
}
setwd("~/MurrayXsan/Bioinformatics/working_dir/extract/TajD/")
for( POP in c("AXIOM","OMNI","CEU","CHB","CHS","GBR","YRI")){
print(POP)
TD=data.frame()
for( i in 1:22){
TD=rbind(TD,read.table(file = paste0(POP,i,".taj_d"), header=TRUE))
}
s = TD %>% group_by(CHROM) %>% summarise(m=mean(TajimaD), sd(TajimaD), min(TajimaD), max(TajimaD), q1 = quantile(TajimaD, 0.01), q2 = quantile(TajimaD, 0.99))
print(s)
plot(make_graph(TD,s))
}
## [1] "AXIOM"
## Source: local data frame [22 x 7]
##
## CHROM m sd(TajimaD) min(TajimaD) max(TajimaD) q1 q2
## 1 1 1.058279 1.221099 -2.41387 5.25771 -1.731181 3.857003
## 2 2 1.137210 1.296195 -2.57669 5.17214 -1.841302 3.917500
## 3 3 1.049447 1.249424 -2.43823 5.00940 -1.799287 3.809078
## 4 4 1.195485 1.277510 -2.40939 5.12566 -1.808687 4.032744
## 5 5 1.218779 1.229530 -2.52491 4.90840 -1.641137 3.899892
## 6 6 1.122992 1.246940 -2.34988 5.00515 -1.728202 3.885709
## 7 7 1.154358 1.288883 -2.37870 5.09934 -1.853535 3.872779
## 8 8 1.241303 1.293007 -2.47176 4.96928 -1.754783 4.079039
## 9 9 1.017197 1.222401 -2.38939 5.04482 -1.655562 3.932412
## 10 10 1.130169 1.244277 -2.50805 4.80763 -1.757709 3.882264
## .. ... ... ... ... ... ... ...

## [1] "OMNI"
## Source: local data frame [22 x 7]
##
## CHROM m sd(TajimaD) min(TajimaD) max(TajimaD) q1 q2
## 1 1 1.179641 1.313091 -2.57726 5.38479 -1.794255 4.150716
## 2 2 1.279367 1.396452 -2.57055 5.05386 -1.971803 4.139439
## 3 3 1.155094 1.358149 -2.48649 5.43189 -1.946865 4.015615
## 4 4 1.310548 1.366403 -2.47182 5.13952 -2.022585 4.195300
## 5 5 1.377433 1.325676 -2.46235 5.77829 -1.816413 4.138347
## 6 6 1.367835 1.337405 -2.55347 5.52324 -1.915355 4.122679
## 7 7 1.378357 1.354550 -2.65073 5.36996 -1.938387 4.147289
## 8 8 1.405680 1.348083 -2.52308 5.13749 -1.906609 4.121874
## 9 9 1.151282 1.296751 -2.30045 5.33263 -1.690985 4.218668
## 10 10 1.294337 1.341359 -2.44675 5.09618 -1.804936 4.176304
## .. ... ... ... ... ... ... ...

## [1] "CEU"
## Source: local data frame [22 x 7]
##
## CHROM m sd(TajimaD) min(TajimaD) max(TajimaD) q1
## 1 1 0.8565231 1.100705 -2.64798 5.06537 -1.743686
## 2 2 0.9277225 1.096790 -2.58216 4.60973 -1.656872
## 3 3 0.9974389 1.105686 -2.47956 5.41122 -1.689959
## 4 4 1.0271121 1.142481 -2.45799 4.96464 -1.698714
## 5 5 1.0042110 1.081483 -2.26605 4.34600 -1.528696
## 6 6 1.0331145 1.080514 -2.49485 4.49871 -1.609358
## 7 7 1.0103342 1.099849 -2.34492 4.67497 -1.700800
## 8 8 0.9288049 1.135500 -2.46394 5.18678 -1.824635
## 9 9 0.7950899 1.037591 -2.37685 4.92409 -1.485879
## 10 10 0.9824180 1.096389 -2.24969 5.07686 -1.644561
## .. ... ... ... ... ... ...
## Variables not shown: q2 (dbl)

## [1] "CHB"
## Source: local data frame [22 x 7]
##
## CHROM m sd(TajimaD) min(TajimaD) max(TajimaD) q1 q2
## 1 1 1.410159 1.304827 -2.24732 5.40691 -1.463001 4.190590
## 2 2 1.513834 1.330909 -2.44921 5.28161 -1.683473 4.240492
## 3 3 1.491101 1.295906 -2.30389 5.62477 -1.584205 4.156571
## 4 4 1.669091 1.293744 -2.47443 5.49978 -1.538758 4.398708
## 5 5 1.613264 1.275396 -2.35769 5.43509 -1.453832 4.295589
## 6 6 1.609703 1.222567 -2.35785 5.29033 -1.336052 4.290732
## 7 7 1.627516 1.273563 -2.30162 5.12753 -1.502277 4.215970
## 8 8 1.683986 1.279263 -2.39828 5.13955 -1.568622 4.294316
## 9 9 1.314781 1.246523 -2.23407 5.01544 -1.321288 3.963602
## 10 10 1.569031 1.247372 -2.34669 5.65041 -1.503554 4.199709
## .. ... ... ... ... ... ... ...

## [1] "CHS"
## Source: local data frame [22 x 7]
##
## CHROM m sd(TajimaD) min(TajimaD) max(TajimaD) q1 q2
## 1 1 1.411644 1.301160 -2.25120 5.43979 -1.461741 4.217358
## 2 2 1.517107 1.334690 -2.41266 5.17482 -1.623727 4.268971
## 3 3 1.488766 1.300325 -2.41213 5.95105 -1.623663 4.217388
## 4 4 1.686436 1.305082 -2.14187 5.67374 -1.546137 4.457816
## 5 5 1.587065 1.296075 -2.30040 5.38741 -1.449092 4.351365
## 6 6 1.634972 1.255553 -2.32052 5.48588 -1.370659 4.309805
## 7 7 1.642527 1.270168 -2.18890 5.37590 -1.515004 4.257370
## 8 8 1.675892 1.286911 -2.29523 5.41696 -1.535959 4.324278
## 9 9 1.333981 1.259306 -2.43656 4.72201 -1.237503 4.021613
## 10 10 1.607835 1.247713 -1.99432 5.67257 -1.457846 4.243254
## .. ... ... ... ... ... ... ...

## [1] "GBR"
## Source: local data frame [22 x 7]
##
## CHROM m sd(TajimaD) min(TajimaD) max(TajimaD) q1
## 1 1 0.8899343 1.099841 -2.49439 4.86906 -1.704978
## 2 2 0.9469137 1.108518 -2.56737 4.48601 -1.660077
## 3 3 1.0240108 1.111766 -2.56121 5.25840 -1.682188
## 4 4 1.0734862 1.160448 -2.45368 4.90476 -1.648888
## 5 5 1.0330567 1.065542 -2.43175 4.91534 -1.472810
## 6 6 1.0672095 1.086707 -2.42006 4.85198 -1.610954
## 7 7 1.0505273 1.105998 -2.54973 4.81216 -1.737564
## 8 8 0.9614845 1.136591 -2.45889 5.00832 -1.867437
## 9 9 0.8202681 1.043071 -2.37884 4.53003 -1.508160
## 10 10 1.0098594 1.065343 -2.36969 4.90353 -1.454549
## .. ... ... ... ... ... ...
## Variables not shown: q2 (dbl)

## [1] "YRI"
## Source: local data frame [22 x 7]
##
## CHROM m sd(TajimaD) min(TajimaD) max(TajimaD) q1
## 1 1 0.4247375 0.7073689 -1.79954 4.06479 -1.116038
## 2 2 0.4552821 0.6976486 -1.90543 3.91914 -1.067957
## 3 3 0.4951910 0.7040228 -1.74635 4.32211 -1.019574
## 4 4 0.5211350 0.7533990 -1.90023 3.87774 -1.104760
## 5 5 0.4479188 0.7088040 -1.89752 4.03198 -1.078202
## 6 6 0.5091487 0.7569150 -1.70024 4.11953 -1.094817
## 7 7 0.4804271 0.7383283 -1.62320 4.33519 -1.120619
## 8 8 0.4628361 0.7014090 -1.80970 3.41125 -1.011462
## 9 9 0.4123545 0.6821319 -1.77277 3.42126 -1.051369
## 10 10 0.4961932 0.7249085 -1.87367 3.64488 -1.044511
## .. ... ... ... ... ... ...
## Variables not shown: q2 (dbl)
